import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import cell2location
import scvi
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs
print("Import done!")
Global seed set to 0
Import done!
#loading visium data
adata_vis = sc.read_visium("./data/Hamstring/Ham1/", count_file='filtered_feature_bc_matrix.h5')
adata_vis.obs['sample'] = list(adata_vis.uns['spatial'].keys())[0]
adata_vis
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
AnnData object with n_obs × n_vars = 2981 × 36601
obs: 'in_tissue', 'array_row', 'array_col', 'sample'
var: 'gene_ids', 'feature_types', 'genome'
uns: 'spatial'
obsm: 'spatial'
sc.pl.spatial(adata=adata_vis)
adata_vis.var
| gene_ids | feature_types | genome | |
|---|---|---|---|
| MIR1302-2HG | ENSG00000243485 | Gene Expression | GRCh38 |
| FAM138A | ENSG00000237613 | Gene Expression | GRCh38 |
| OR4F5 | ENSG00000186092 | Gene Expression | GRCh38 |
| AL627309.1 | ENSG00000238009 | Gene Expression | GRCh38 |
| AL627309.3 | ENSG00000239945 | Gene Expression | GRCh38 |
| ... | ... | ... | ... |
| AC141272.1 | ENSG00000277836 | Gene Expression | GRCh38 |
| AC023491.2 | ENSG00000278633 | Gene Expression | GRCh38 |
| AC007325.1 | ENSG00000276017 | Gene Expression | GRCh38 |
| AC007325.4 | ENSG00000278817 | Gene Expression | GRCh38 |
| AC007325.2 | ENSG00000277196 | Gene Expression | GRCh38 |
36601 rows × 3 columns
adata_vis.var_names_make_unique()
# find mitochondria-encoded (MT) genes
adata_vis.var['mt'] = adata_vis.var_names.str.startswith("MT-")
# ribosomal genes
adata_vis.var['ribo'] = adata_vis.var_names.str.startswith(("RPS","RPL"))
# hemoglobin genes.
adata_vis.var['hb'] = adata_vis.var_names.str.contains(("^HB[^(P)]"))
sc.pp.calculate_qc_metrics(adata_vis, qc_vars=["mt"], inplace=True)
adata_vis.var
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
| gene_ids | feature_types | genome | mt | ribo | hb | n_cells_by_counts | mean_counts | log1p_mean_counts | pct_dropout_by_counts | total_counts | log1p_total_counts | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MIR1302-2HG | ENSG00000243485 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| FAM138A | ENSG00000237613 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| OR4F5 | ENSG00000186092 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| AL627309.1 | ENSG00000238009 | Gene Expression | GRCh38 | False | False | False | 1 | 0.001006 | 0.001006 | 99.966454 | 3.0 | 1.386294 |
| AL627309.3 | ENSG00000239945 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| AC141272.1 | ENSG00000277836 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| AC023491.2 | ENSG00000278633 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| AC007325.1 | ENSG00000276017 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
| AC007325.4 | ENSG00000278817 | Gene Expression | GRCh38 | False | False | False | 13 | 0.005703 | 0.005687 | 99.563905 | 17.0 | 2.890372 |
| AC007325.2 | ENSG00000277196 | Gene Expression | GRCh38 | False | False | False | 0 | 0.000000 | 0.000000 | 100.000000 | 0.0 | 0.000000 |
36601 rows × 12 columns
adata_vis
AnnData object with n_obs × n_vars = 2981 × 36601
obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
uns: 'spatial'
obsm: 'spatial'
sns.jointplot(
data=adata_vis.obs,
x="log1p_total_counts",
y="log1p_n_genes_by_counts",
kind="hex",
)
<seaborn.axisgrid.JointGrid at 0x7fab3c245be0>
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
sns.histplot(adata_vis.obs["total_counts"], kde=False, ax=axs[0])
sns.histplot(adata_vis.obs["total_counts"][adata_vis.obs["total_counts"] < 200], kde=False, bins=40, ax=axs[1])
sns.histplot(adata_vis.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
sns.histplot(adata_vis.obs["n_genes_by_counts"][adata_vis.obs["n_genes_by_counts"] < 200], kde=False, bins=60, ax=axs[3])
<AxesSubplot:xlabel='n_genes_by_counts', ylabel='Count'>
adata_vis = adata_vis_orig
adata_vis_orig
AnnData object with n_obs × n_vars = 2818 × 36601
obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_counts'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
uns: 'spatial'
obsm: 'spatial'
sc.pp.filter_cells(adata_vis, min_counts=20)
#sc.pp.filter_cells(adata, max_counts=35000)
adata_vis = adata_vis[adata_vis.obs["pct_counts_mt"] < 25]
print(f"#cells after MT filter: {adata_vis.n_obs}")
sc.pp.filter_genes(adata_vis, min_cells=2)
#cells after MT filter: 2637
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:251: ImplicitModificationWarning: Trying to modify attribute `.var` of view, initializing view as actual. adata.var['n_cells'] = number
sc.pl.spatial(adata_vis, img_key="hires", color='total_counts', spot_size=40, vmax=1000)
sc.pp.normalize_total(adata_vis, inplace=True)
sc.pp.log1p(adata_vis)
sc.pp.highly_variable_genes(adata_vis, flavor="seurat", n_top_genes=1000)
adata_vis
AnnData object with n_obs × n_vars = 2637 × 13134
obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_counts'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
uns: 'spatial', 'log1p', 'hvg'
obsm: 'spatial'
#sc.pp.pca(adata_vis)
#sc.pp.neighbors(adata_vis)
#sc.tl.umap(adata_vis)
sc.tl.leiden(adata_vis, resolution=0.6, key_added="clusters")
plt.rcParams["figure.figsize"] = (4, 4)
sc.pl.umap(adata_vis, color=["total_counts", "n_genes_by_counts", "clusters"], wspace=0.4)
plt.rcParams["figure.figsize"] = (8, 8)
sc.pl.spatial(adata_vis, img_key="hires", color=["total_counts", "n_genes_by_counts"], size = 1.5)
sc.pl.spatial(adata_vis, img_key="hires", color="clusters", size=1.5)
markers = ['COL1A2', 'TTN', 'COMP', 'MKX', 'PECAM1']
sc.pl.dotplot(adata_vis, markers, groupby='clusters', dendrogram=True, title="Known Cell Markers")
#code to crop part of the image. Our hires image is 2000x2000 pixels. Top left is 0,0.
#Coordinates to use for cropping the image (left, right, top, bottom)
sc.pl.spatial(adata_vis, img_key="hires", color="clusters", groups=["0","2", "1", "5"], crop_coord=[2200, 3000, 900, 1600], size=1.4, title="Muscle-like Area")
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
sc.tl.rank_genes_groups(adata_vis, "clusters", method="t-test")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="6", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="5", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="4", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="3", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="2", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="1", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="0", n_genes=10, groupby="clusters")
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 6
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 5
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 4
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 3
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 2
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 1
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: 0
sc.pl.spatial(adata_vis, img_key="hires", color=["TNNT1", "TNNT3"], crop_coord=[2200, 3000, 900, 1600], color_map="magma", vmax=1)
sc.pl.spatial(adata_vis, img_key="hires", color=["PECAM1", "FLT1"], size=1.5, vmax=1)
#code to crop part of the image. Our hires image is 2000x2000 pixels. Top left is 0,0.
#Coordinates to use for cropping the image (left, right, top, bottom)
sc.pl.spatial(adata_vis, img_key="hires", color=["PECAM1", "FLT1"], crop_coord=[700, 2000, 700, 1900], size=1.5, vmax=1)
#code to crop part of the image. Our hires image is 2000x2000 pixels. Top left is 0,0.
#Coordinates to use for cropping the image (left, right, top, bottom)
sc.pl.spatial(adata_vis, img_key="hires", color=["NOTCH3", "LYVE1"], crop_coord=[1000, 2000, 700, 1900], size=1.5, vmax=1)
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["NOTCH3"], size=1.5, vmax=1)
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["COL1A2", "DCN"], size=1.5, vmax=2)
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["PTPRC", "CD163"], size=1.5)
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["CD68", "CX3CR1"], size=1.5)
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["TRDN", "TNNT1", "TNNT3"], size=1.5, vmax=1)
sc.pl.spatial(adata_vis, img_key="hires", color=["TRDN","TNNT1", "TNNT3"], crop_coord=[2200, 3000, 900, 1600], vmax=1, size=1.3)